set.seed(1)

addTimeStamp <- function(saveInfo, tstart, info){
  timeDiff <- abs(round(difftime(Sys.time(), tstart, units = "mins"), 3))
  print(paste0("Adding Time Stamp : ", info , ", ", timeDiff, " minutes from start..."))
  saveInfo$timeStamps[[length(saveInfo$timeStamps) + 1]] <- list(
    info = info, 
    time = timeDiff
  )
  saveInfo
}

print(Sys.time())
## [1] "2020-02-14 11:54:09 PST"
#Print Input Params
print(params)
## $jobAttempt
## [1] 1
## 
## $input
## [1] "/scratch/users/jgranja/BenchmarksLarge/outputSmall/Signac/Projects/Heme_Small_Rep1/Heme_Small_Rep1-Signac-4-Clustering-Save.rds"
## 
## $project
## [1] "Heme_Small_Rep1"
## 
## $projectMetadata
## [1] "Project_Metadata.tsv"
## 
## $threads
## [1] 20
## 
## $out1
## [1] "/scratch/users/jgranja/BenchmarksLarge/outputSmall/Signac/Projects/Heme_Small_Rep1/Heme_Small_Rep1-Signac-5-Embedding-Benchmark.rds"
## 
## $out2
## [1] "/scratch/users/jgranja/BenchmarksLarge/outputSmall/Signac/Projects/Heme_Small_Rep1/Heme_Small_Rep1-Signac-5-Embedding-Save.rds"
#Get Project Metadata
df <- data.frame(readr::read_tsv(paste0(params$projectMetadata)))
## Parsed with column specification:
## cols(
##   Project = col_character(),
##   Metadata = col_character()
## )
#Get Sample Metadata
metadata <- data.frame(readr::read_tsv(df[paste0(df[,1]) == paste0(params$project), 2]))
## Parsed with column specification:
## cols(
##   Sample = col_character(),
##   Genome = col_character(),
##   Path = col_character()
## )
print(metadata)
##    Sample Genome
## 1 BMMC_R1   Hg19
## 2 BMMC_R2   Hg19
## 3 CD34_R1   Hg19
## 4 CD34_R2   Hg19
## 5 CD34_R3   Hg19
##                                                                                                                             Path
## 1 /oak/stanford/groups/howchang/users/jgranja/ArchR_scATAC/data/fragments/hg19/mpal/healthy/BMMC_Healthy_10x_R1.fragments.tsv.gz
## 2 /oak/stanford/groups/howchang/users/jgranja/ArchR_scATAC/data/fragments/hg19/mpal/healthy/BMMC_Healthy_10x_R2.fragments.tsv.gz
## 3           /oak/stanford/groups/howchang/users/jgranja/ArchR_scATAC/data/fragments/hg19/mpal/healthy/CD34_A_R1.fragments.tsv.gz
## 4           /oak/stanford/groups/howchang/users/jgranja/ArchR_scATAC/data/fragments/hg19/mpal/healthy/CD34_A_R2.fragments.tsv.gz
## 5             /oak/stanford/groups/howchang/users/jgranja/ArchR_scATAC/data/fragments/hg19/mpal/healthy/CD34_R3.fragments.tsv.gz
#Input Files
#inputFiles <- paste0(metadata$Path)
#names(inputFiles) <- metadata$Sample
#print(inputFiles)

#Genome
genome <- tolower(paste0(metadata$Genome[1]))

#Working Directory Relative to project
owd <- getwd()
setwd(dirname(paste0(params$out1)))

#Start Time
saveInfo <- list(timeStamps = 
  list(
    list(info = "Start", time = 0)
  ), 
  info = list()
)

#Load R Libraries
library(Signac)
library(Seurat)
library(GenomeInfoDb)
## Loading required package: BiocGenerics
## Loading required package: parallel
## 
## Attaching package: 'BiocGenerics'
## The following objects are masked from 'package:parallel':
## 
##     clusterApply, clusterApplyLB, clusterCall, clusterEvalQ,
##     clusterExport, clusterMap, parApply, parCapply, parLapply,
##     parLapplyLB, parRapply, parSapply, parSapplyLB
## The following objects are masked from 'package:stats':
## 
##     IQR, mad, sd, var, xtabs
## The following objects are masked from 'package:base':
## 
##     anyDuplicated, append, as.data.frame, basename, cbind, colnames,
##     dirname, do.call, duplicated, eval, evalq, Filter, Find, get, grep,
##     grepl, intersect, is.unsorted, lapply, Map, mapply, match, mget,
##     order, paste, pmax, pmax.int, pmin, pmin.int, Position, rank,
##     rbind, Reduce, rownames, sapply, setdiff, sort, table, tapply,
##     union, unique, unsplit, which, which.max, which.min
## Loading required package: S4Vectors
## Loading required package: stats4
## 
## Attaching package: 'S4Vectors'
## The following object is masked from 'package:base':
## 
##     expand.grid
## Loading required package: IRanges
library(GenomicRanges)
library(ggplot2)

#1. Seurat Object
seuratObj <- readRDS(params$input)
print(seuratObj)
## An object of class Seurat 
## 590637 features across 28388 samples within 1 assay 
## Active assay: peaks (590637 features)
##  1 dimensional reduction calculated: lsi
print(paste0("Object Size = ", round(object.size(seuratObj)/10^9, 4), " GB"))
## [1] "Object Size = 6.149 GB"
tstart <- Sys.time()
saveInfo <- addTimeStamp(saveInfo, tstart, info = "Input")
## [1] "Adding Time Stamp : Input, 0 minutes from start..."
#2. UMAP
seuratObj <- RunUMAP(object = seuratObj, reduction = 'lsi', dims = 1:30)
## Warning: The default method for RunUMAP has changed from calling Python UMAP via reticulate to the R-native UWOT using the cosine metric
## To use Python UMAP via reticulate, set umap.method to 'umap-learn' and metric to 'correlation'
## This message will be shown once per session
## 11:55:16 UMAP embedding parameters a = 0.9922 b = 1.112
## 11:55:16 Read 28388 rows and found 30 numeric columns
## 11:55:16 Using Annoy for neighbor search, n_neighbors = 30
## 11:55:16 Building Annoy index with metric = cosine, n_trees = 50
## 0%   10   20   30   40   50   60   70   80   90   100%
## [----|----|----|----|----|----|----|----|----|----|
## **************************************************|
## 11:55:22 Writing NN index file to temp file /tmp/Rtmpj9VA3Z/file2c21c45a8126b
## 11:55:22 Searching Annoy index using 1 thread, search_k = 3000
## 11:55:33 Annoy recall = 100%
## 11:55:34 Commencing smooth kNN distance calibration using 1 thread
## 11:55:38 Initializing from normalized Laplacian + noise
## 11:55:39 Commencing optimization for 200 epochs, with 1183462 positive edges
## 11:56:13 Optimization finished
print(paste0("Object Size = ", round(object.size(seuratObj)/10^9, 4), " GB"))
## [1] "Object Size = 6.152 GB"
saveInfo <- addTimeStamp(saveInfo, tstart, info = "UMAP")
## [1] "Adding Time Stamp : UMAP, 0.947 minutes from start..."
#3. Plot UMAP
DimPlot(object = seuratObj, group.by = 'sampleName', label = TRUE, reduction = "umap") + NoLegend()
## Warning: Using `as.character()` on a quosure is deprecated as of rlang 0.3.0.
## Please use `as_label()` or `as_name()` instead.
## This warning is displayed once per session.

DimPlot(object = seuratObj, label = TRUE, reduction = "umap") + NoLegend()

saveInfo <- addTimeStamp(saveInfo, tstart, info = paste0("plotUMAP"))
## [1] "Adding Time Stamp : plotUMAP, 0.982 minutes from start..."
#4. TSNE
seuratObj <- RunTSNE(object = seuratObj, reduction = 'lsi', dims = 1:30)
print(paste0("Object Size = ", round(object.size(seuratObj)/10^9, 4), " GB"))
## [1] "Object Size = 6.1574 GB"
saveInfo <- addTimeStamp(saveInfo, tstart, info = paste0("TSNE"))
## [1] "Adding Time Stamp : TSNE, 3.199 minutes from start..."
#5. Plot TSNE
DimPlot(object = seuratObj, group.by = 'sampleName', label = TRUE, reduction = "tsne") + NoLegend()

DimPlot(object = seuratObj, label = TRUE, reduction = "tsne") + NoLegend()

saveInfo <- addTimeStamp(saveInfo, tstart, info = paste0("plotTSNE"))
## [1] "Adding Time Stamp : plotTSNE, 3.23 minutes from start..."
saveInfo <- addTimeStamp(saveInfo, tstart, info = paste0("Completed"))
## [1] "Adding Time Stamp : Completed, 3.23 minutes from start..."
print(Sys.time() - tstart)
## Time difference of 3.230458 mins
#Set Original Working Directory
#Save Output
setwd(owd)
saveRDS(saveInfo, params$out1)
saveRDS(seuratObj, params$out2)

#Print Session Info
sessionInfo()
## R version 3.6.1 (2019-07-05)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: CentOS Linux 7 (Core)
## 
## Matrix products: default
## BLAS/LAPACK: /share/software/user/open/openblas/0.2.19/lib/libopenblasp-r0.2.19.so
## 
## locale:
##  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
##  [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8   
##  [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       
## 
## attached base packages:
## [1] stats4    parallel  stats     graphics  grDevices utils     datasets 
## [8] methods   base     
## 
## other attached packages:
## [1] ggplot2_3.2.1        GenomicRanges_1.38.0 GenomeInfoDb_1.22.0 
## [4] IRanges_2.20.2       S4Vectors_0.24.3     BiocGenerics_0.32.0 
## [7] Seurat_3.1.2         Signac_0.2.2        
## 
## loaded via a namespace (and not attached):
##   [1] sn_1.5-5                    plyr_1.8.5                 
##   [3] igraph_1.2.4.2              lazyeval_0.2.2             
##   [5] splines_3.6.1               BiocParallel_1.20.1        
##   [7] listenv_0.8.0               TH.data_1.0-10             
##   [9] digest_0.6.23               htmltools_0.4.0            
##  [11] gdata_2.18.0                magrittr_1.5               
##  [13] memoise_1.1.0               BSgenome_1.54.0            
##  [15] cluster_2.1.0               ROCR_1.0-7                 
##  [17] ggfittext_0.8.1             globals_0.12.5             
##  [19] Biostrings_2.54.0           readr_1.3.1                
##  [21] RcppParallel_4.4.4          matrixStats_0.55.0         
##  [23] R.utils_2.9.2               sandwich_2.5-1             
##  [25] prettyunits_1.1.1           colorspace_1.4-1           
##  [27] blob_1.2.1                  rappdirs_0.3.1             
##  [29] ggrepel_0.8.1               xfun_0.12                  
##  [31] dplyr_0.8.4                 crayon_1.3.4               
##  [33] RCurl_1.98-1.1              jsonlite_1.6               
##  [35] survival_2.44-1.1           zoo_1.8-7                  
##  [37] ape_5.3                     glue_1.3.1                 
##  [39] gtable_0.3.0                zlibbioc_1.32.0            
##  [41] XVector_0.26.0              leiden_0.3.2               
##  [43] DelayedArray_0.12.2         future.apply_1.4.0         
##  [45] scales_1.1.0                mvtnorm_1.0-12             
##  [47] DBI_1.1.0                   bibtex_0.4.2.2             
##  [49] Rcpp_1.0.3                  metap_1.3                  
##  [51] plotrix_3.7-7               viridisLite_0.3.0          
##  [53] progress_1.2.2              reticulate_1.14            
##  [55] bit_1.1-15.1                rsvd_1.0.2                 
##  [57] SDMTools_1.1-221.2          tsne_0.1-3                 
##  [59] htmlwidgets_1.5.1           httr_1.4.1                 
##  [61] gplots_3.0.1.2              RColorBrewer_1.1-2         
##  [63] TFisher_0.2.0               ica_1.0-2                  
##  [65] farver_2.0.3                pkgconfig_2.0.3            
##  [67] XML_3.99-0.3                R.methodsS3_1.7.1          
##  [69] ggseqlogo_0.1               uwot_0.1.5                 
##  [71] labeling_0.3                tidyselect_1.0.0           
##  [73] rlang_0.4.4                 reshape2_1.4.3             
##  [75] AnnotationDbi_1.48.0        munsell_0.5.0              
##  [77] tools_3.6.1                 RSQLite_2.2.0              
##  [79] ggridges_0.5.2              evaluate_0.14              
##  [81] stringr_1.4.0               yaml_2.2.1                 
##  [83] npsurv_0.4-0                knitr_1.27                 
##  [85] bit64_0.9-7                 fitdistrplus_1.0-14        
##  [87] caTools_1.18.0              purrr_0.3.3                
##  [89] RANN_2.6.1                  pbapply_1.4-2              
##  [91] future_1.16.0               nlme_3.1-140               
##  [93] R.oo_1.23.0                 biomaRt_2.40.5             
##  [95] compiler_3.6.1              plotly_4.9.1               
##  [97] png_0.1-7                   lsei_1.2-0                 
##  [99] tibble_2.1.3                stringi_1.4.5              
## [101] RSpectra_0.16-0             GenomicFeatures_1.36.4     
## [103] lattice_0.20-38             Matrix_1.2-17              
## [105] multtest_2.42.0             vctrs_0.2.2                
## [107] mutoss_0.1-12               pillar_1.4.3               
## [109] lifecycle_0.1.0             Rdpack_0.11-1              
## [111] lmtest_0.9-37               RcppAnnoy_0.0.14           
## [113] data.table_1.12.8           cowplot_1.0.0              
## [115] bitops_1.0-6                irlba_2.3.3                
## [117] gbRd_0.4-11                 gggenes_0.4.0              
## [119] patchwork_1.0.0             rtracklayer_1.46.0         
## [121] R6_2.4.1                    KernSmooth_2.23-15         
## [123] gridExtra_2.3               codetools_0.2-16           
## [125] MASS_7.3-51.4               gtools_3.8.1               
## [127] assertthat_0.2.1            SummarizedExperiment_1.16.1
## [129] withr_2.1.2                 GenomicAlignments_1.22.1   
## [131] sctransform_0.2.1           Rsamtools_2.2.1            
## [133] mnormt_1.5-5                multcomp_1.4-12            
## [135] GenomeInfoDbData_1.2.2      hms_0.5.3                  
## [137] grid_3.6.1                  tidyr_1.0.2                
## [139] rmarkdown_2.1               Rtsne_0.15                 
## [141] numDeriv_2016.8-1.1         Biobase_2.46.0